---
title: "Power Analysis"
author: "Kaylyn Jackson Schiff, Daniel Schiff, and Natalia Bueno"
date: "2020"
output: pdf_document
editor_options: 
  chunk_output_type: console
---

#####Setup Chunk
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(rmarkdown)
library(tidyverse)
library(estimatr)
options(scipen=999)
```


#####Set Parameters for Power Analysis
```{r establish parameters for power analysis}
alpha <- 0.05 # Standard significance level 
possible.ate <- seq(from=0.01, to=0.3, by=.01) # The ATEs we'll be considering 
N <- 3500 # Our sample size
sims <- 1000 # Number of simulated treatment allocations to conduct for each ATE 

powers <- rep(NA, length(possible.ate)) # Empty object to collect lm simulation estimates 
powers.diff <- rep(NA, length(possible.ate)) # Empty object to collect lm simulation estimates 
```


####Simulate Results for Treatment Effect Differences with Support Outcome
```{r loop of lms to simulate results}
#Loop to vary the ATE
for (j in 1:length(possible.ate)){ ATE <- possible.ate[j] # Pick the ATE

Y0 <- rnorm(n=N, mean=0.0378, sd=0.8704) # IU potential outcome 
tau1 <- ATE # Hypothesized treatment effect difference
print(ATE)
Y1 <- Y0 - tau1 # treatment potential outcome for other treatment group
#Y2 <- Y0 + 0 # treatment potential outcome for other treatment group

significant.lm <- rep(NA, sims) # Empty object to count significant experiments 

#Inner loop to conduct experiments "sims" times over for each ATE
for (i in 1:sims){
  Z.sim <- sample(0:1, N, replace = T, prob = c(1/2, 1/2)) # Do a random assignment 
  Y.sim <- case_when(Z.sim == 0 ~ Y0, Z.sim == 1 ~ Y1) # Reveal outcomes according to assignment 

  fit.lm <- lm_robust(Y.sim ~ factor(Z.sim)) # Do analysis (Simple regression)
  
  p.lm <- summary(fit.lm)$coefficients[2,4] # Extract p-values  
  significant.lm[i] <- (p.lm <= alpha) # Determine significance according to p <= 0.05
}
  powers[j] <- mean(significant.lm) # store average success rate (power) for each N 
}

powers1 <- powers
cbind(possible.ate, powers1)
```

####Plot Results
```{r plot results}
png("Figures/power_support_followup_2.png")
plot(possible.ate, powers1, type = "b", main = "Power to Detect Treatment Effect Differences with Support Outcome", 
     xlab = "Possible ATE (Treatment Effect Difference)", ylab = "Power")
points(possible.ate, powers1, 
       pch = 19, col = "gray", bg = "gray", cex = 1.3)
abline(h = .8, lty = 2)
#abline(v = 1500, lty = 4)
dev.off()
```



#Redo with 3k

#####Set Parameters for Power Analysis
```{r establish parameters for power analysis}
alpha <- 0.05 # Standard significance level 
possible.ate <- seq(from=0.01, to=0.3, by=.01) # The ATEs we'll be considering 
N <- 3000 # Our sample size
sims <- 1000 # Number of simulated treatment allocations to conduct for each ATE 

powers <- rep(NA, length(possible.ate)) # Empty object to collect lm simulation estimates 
powers.diff <- rep(NA, length(possible.ate)) # Empty object to collect lm simulation estimates 
```


####Simulate Results for Treatment Effect Differences with Support Outcome
```{r loop of lms to simulate results}
#Loop to vary the ATE
for (j in 1:length(possible.ate)){ ATE <- possible.ate[j] # Pick the ATE

Y0 <- rnorm(n=N, mean=0.0378, sd=0.8704) # IU potential outcome 
tau1 <- ATE # Hypothesized treatment effect difference
print(ATE)
Y1 <- Y0 - tau1 # treatment potential outcome for other treatment group
Y2 <- Y0 - tau1 # treatment potential outcome for other treatment group

significant.lm <- rep(NA, sims) # Empty object to count significant experiments 

#Inner loop to conduct experiments "sims" times over for each ATE
for (i in 1:sims){
  Z.sim <- sample(0:2, N, replace = T, prob = c(1/3, 1/3, 1/3)) # Do a random assignment 
  Y.sim <- case_when(Z.sim == 0 ~ Y0, Z.sim == 1 ~ Y1, Z.sim == 2 ~ Y2) # Reveal outcomes according to assignment 
  #Z.sim <- if_else(Z.sim == 0, 0, 1)

  fit.lm <- lm_robust(Y.sim ~ factor(Z.sim)) # Do analysis (Simple regression)
  
  p.lm <- summary(fit.lm)$coefficients[2,4] # Extract p-values  
  significant.lm[i] <- (p.lm <= alpha) # Determine significance according to p <= 0.05
}
  powers[j] <- mean(significant.lm) # store average success rate (power) for each N 
}

powers2 <- powers
cbind(possible.ate, powers2)
```

####Plot Results
```{r plot results}
png("Figures/power_support_followup_2_3k.png")
plot(possible.ate, powers2, type = "b", main = "Power to Detect Treatment Effect Diff. with Support Outcome", 
     xlab = "Possible ATE (Treatment Effect Difference)", ylab = "Power")
points(possible.ate, powers2, 
       pch = 19, col = "gray", bg = "gray", cex = 1.3)
abline(h = .8, lty = 2)
#abline(v = 1500, lty = 4)
dev.off()
```




#Redo with SD simulations

#####Set Parameters for Power Analysis
```{r establish parameters for power analysis}
alpha <- 0.05 # Standard significance level 
possible.ate <- seq(from=0.01, to=0.2, by=.01) # The ATEs we'll be considering 
N <- 3000 # Our sample size
sims <- 1000 # Number of simulated treatment allocations to conduct for each ATE 

powers <- rep(NA, length(possible.ate)) # Empty object to collect lm simulation estimates 


possible.sd <- c(1, 1.1, 1.15, 1.3, 1.5)
powers_sd <- matrix(0, nrow = length(possible.ate), ncol = length(possible.sd))
```


####Simulate Results for Treatment Effect Differences with Support Outcome
```{r loop of lms to simulate results}

for (k in 1:length(possible.sd)){ SD <- possible.sd[k] # Pick the ATE
print(SD)

#Loop to vary the ATE
for (j in 1:length(possible.ate)){ ATE <- possible.ate[j] # Pick the ATE
print(ATE)

Y0 <- rnorm(n=N, mean=0.0378, sd=0.8704) # IU potential outcome 
tau1 <- ATE # Hypothesized treatment effect difference
Y1 <- rnorm(n=N, mean=0.0378 - tau1, sd=0.8704) # treatment potential outcome for other treatment group
Y2 <- rnorm(n=N, mean=0.0378 - tau1, sd=0.8704 * SD) # treatment potential outcome for other treatment group

significant.lm <- rep(NA, sims) # Empty object to count significant experiments 

#Inner loop to conduct experiments "sims" times over for each ATE
for (i in 1:sims){
  Z.sim <- sample(0:2, N, replace = T, prob = c(1/2, 1/4, 1/4)) # Do a random assignment 
  Y.sim <- case_when(Z.sim == 0 ~ Y0, Z.sim == 1 ~ Y1, Z.sim == 2 ~ Y2) # Reveal outcomes according to assignment 
  Z.sim <- if_else(Z.sim == 0, 0, 1)
  
  fit.lm <- lm_robust(Y.sim ~ factor(Z.sim)) # Do analysis (Simple regression)
  
  p.lm <- summary(fit.lm)$coefficients[2,4] # Extract p-values  
  significant.lm[i] <- (p.lm <= alpha) # Determine significance according to p <= 0.05
}
powers[j] <- mean(significant.lm) # store average success rate (power) for each N 
}
powers_sd[, k] <- powers
}

powers_sd <- powers_sd %>% as_tibble
colnames(powers_sd) <- possible.sd
powers_sd <- cbind(possible.ate, powers_sd)
powers_sd

saveRDS(powers_sd, "data/powers_sd_quarter.rds")

#powers2 <- powers
#cbind(possible.ate, powers2)
```

#Redo with SD simulations

#####Set Parameters for Power Analysis
```{r establish parameters for power analysis}
alpha <- 0.05 # Standard significance level 
possible.ate <- seq(from=0.01, to=0.2, by=.01) # The ATEs we'll be considering 
N <- 3000 # Our sample size
sims <- 10000 # Number of simulated treatment allocations to conduct for each ATE 

powers <- rep(NA, length(possible.ate)) # Empty object to collect lm simulation estimates 


possible.sd <- c(1, 1.1, 1.15, 1.3, 1.5)
powers_sd <- matrix(0, nrow = length(possible.ate), ncol = length(possible.sd))
```


####Simulate Results for Treatment Effect Differences with Support Outcome
```{r loop of lms to simulate results}

for (k in 1:length(possible.sd)){ SD <- possible.sd[k] # Pick the ATE
print(SD)

#Loop to vary the ATE
for (j in 1:length(possible.ate)){ ATE <- possible.ate[j] # Pick the ATE
print(ATE)

Y0 <- rnorm(n=N, mean=0.0378, sd=0.8704) # IU potential outcome 
tau1 <- ATE # Hypothesized treatment effect difference
Y1 <- rnorm(n=N, mean=0.0378 - tau1, sd=0.8704) # treatment potential outcome for other treatment group
Y2 <- rnorm(n=N, mean=0.0378 - tau1, sd=0.8704 * SD) # treatment potential outcome for other treatment group

significant.lm <- rep(NA, sims) # Empty object to count significant experiments 

#Inner loop to conduct experiments "sims" times over for each ATE
for (i in 1:sims){
  Z.sim <- sample(0:2, N, replace = T, prob = c(1/3, 1/3, 1/3)) # Do a random assignment 
  Y.sim <- case_when(Z.sim == 0 ~ Y0, Z.sim == 1 ~ Y1, Z.sim == 2 ~ Y2) # Reveal outcomes according to assignment 
  Z.sim <- if_else(Z.sim == 0, 0, 1)
  
  fit.lm <- lm_robust(Y.sim ~ factor(Z.sim)) # Do analysis (Simple regression)
  
  p.lm <- summary(fit.lm)$coefficients[2,4] # Extract p-values  
  significant.lm[i] <- (p.lm <= alpha) # Determine significance according to p <= 0.05
}
powers[j] <- mean(significant.lm) # store average success rate (power) for each N 
}
powers_sd[, k] <- powers
}

powers_sd <- powers_sd %>% as_tibble
colnames(powers_sd) <- possible.sd
powers_sd <- cbind(possible.ate, powers_sd)
powers_sd

saveRDS(powers_sd, "data/powers_sd_third.rds")

#powers2 <- powers
#cbind(possible.ate, powers2)
```